#setwd('/afs/inf.ed.ac.uk/user/s17/s1725186/Documents/PhD-Models/FirstPUModel/RMarkdowns')

library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally)
library(expss)
library(polycor)
library(foreach) ; library(doParallel)
library(knitr)
library(biomaRt)
library(anRichment) ; library(BrainDiseaseCollection)
suppressWarnings(suppressMessages(library(WGCNA)))

SFARI_colour_hue = function(r) {
  pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}

Load preprocessed dataset (preprocessing code in 20_02_25_data_preprocessing.Rmd) and clustering (pipeline in 20_02_25_WGCNA.Rmd)

# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame


# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>% 
              mutate('ID'=as.character(ensembl_gene_id)) %>% 
              dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
              mutate('Neuronal'=1)


# SFARI Genes
SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_08-29-2019_w_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]


# Clusterings
clusterings = read_csv('./../Data/clusters.csv')


# Update DE_info with SFARI and Neuronal information
genes_info = DE_info %>% mutate('ID'=rownames(.)) %>% left_join(SFARI_genes, by='ID') %>% 
  mutate(`gene-score`=ifelse(is.na(`gene-score`), 'None', `gene-score`)) %>%
  left_join(GO_neuronal, by='ID') %>% left_join(clusterings, by='ID') %>%
  mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
  mutate(gene.score=ifelse(`gene-score`=='None' & Neuronal==1, 'Neuronal', `gene-score`), 
         significant=padj<0.05 & !is.na(padj))

# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
               host='feb2014.archive.ensembl.org') ## Gencode v19
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=genes_info$ID, mart=mart)

genes_info = genes_info %>% left_join(gene_names, by=c('ID'='ensembl_gene_id'))


clustering_selected = 'DynamicHybridMergedSmall'
genes_info$Module = genes_info[,clustering_selected]

dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'))
dataset$Module = dataset[,clustering_selected]


rm(DE_info, GO_annotations, clusterings, getinfo, mart, dds)


Relation to external clinical traits


Quantifying module-trait associations


Using the hetcor function, that calculates Pearson, polyserial or polychoric correlations depending on the type of variables involved.

datTraits = datMeta %>% dplyr::select(Diagnosis, Sex, Age, PMI, RNAExtractionBatch) %>%
            dplyr::rename('ExtractionBatch' = RNAExtractionBatch)

# Recalculate MEs with color labels
ME_object = datExpr %>% t %>% moduleEigengenes(colors = genes_info$Module)
MEs = orderMEs(ME_object$eigengenes)

# Calculate correlation between eigengenes and the traits and their p-values
moduleTraitCor = MEs %>% apply(2, function(x) hetcor(x, datTraits)$correlations[1,-1]) %>% t
rownames(moduleTraitCor) = colnames(MEs)
colnames(moduleTraitCor) = colnames(datTraits)
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nrow(datExpr))

# Create text matrix for the Heatmap
textMatrix = paste0(signif(moduleTraitCor, 2), ' (', signif(moduleTraitPvalue, 1), ')')
dim(textMatrix) = dim(moduleTraitCor)

# In case there are any NAs
if(sum(!complete.cases(moduleTraitCor))>0){
  print(paste0(sum(is.na(moduleTraitCor)),' correlation(s) could not be calculated')) 
}
## [1] "2 correlation(s) could not be calculated"
rm(ME_object)

Note: The correlation between Modules #00C1A2, #C19B00 and Diagonsis are the ones that cannot be calculated, weirdly enough, the thing that causes the error is that the initial correlation is too high, so it would be a very bad thing to lose this module because of this numerical error. I’m going to fill in its value using the polyserial function, which doesn’t give exactly the same results as the hetcor() function, but it’s quite similar.

# Calculate the correlation tha failed with hetcor()
moduleTraitCor['ME#00C1A2','Diagnosis'] = polyserial(MEs[,'ME#00C1A2'], datTraits$Diagnosis)
## Warning in polyserial(MEs[, "ME#00C1A2"], datTraits$Diagnosis): initial
## correlation inadmissible, -1.09270710407704, set to -0.9999
moduleTraitCor['ME#C19B00','Diagnosis'] = polyserial(MEs[,'ME#C19B00'], datTraits$Diagnosis)
## Warning in polyserial(MEs[, "ME#C19B00"], datTraits$Diagnosis): initial
## correlation inadmissible, 1.04877229459179, set to 0.9999

I’m going to select all the modules that have an absolute correlation higher than 0.9 with Diagnosis to study them

# Sort moduleTraitCor by Diagnosis
moduleTraitCor = moduleTraitCor[order(moduleTraitCor[,1], decreasing=TRUE),]
moduleTraitPvalue = moduleTraitPvalue[order(moduleTraitCor[,1], decreasing=TRUE),]

# Create text matrix for the Heatmap
textMatrix = paste0(signif(moduleTraitCor, 2), ' (', signif(moduleTraitPvalue, 1), ')')
dim(textMatrix) = dim(moduleTraitCor)


labeledHeatmap(Matrix = moduleTraitCor, xLabels = names(datTraits), yLabels =  gsub('ME','',rownames(moduleTraitCor)), 
               yColorWidth=0, colors = brewer.pal(11,'PiYG'), bg.lab.y = gsub('ME','',rownames(moduleTraitCor)),
               textMatrix = textMatrix, setStdMargins = FALSE, cex.text = 0.8, cex.lab.y = 0.75, zlim = c(-1,1),
               main = paste('Module-Trait relationships'))

diagnosis_cor = data.frame('Module' = gsub('ME','',rownames(moduleTraitCor)),
                           'MTcor' = moduleTraitCor[,'Diagnosis'],
                           'MTpval' = moduleTraitPvalue[,'Diagnosis'])

genes_info = genes_info %>% left_join(diagnosis_cor, by='Module')

rm(moduleTraitPvalue, datTraits, textMatrix, diagnosis_cor)


Studying the modules with the highest absolute correlation to Diagnosis


The modules consist mainly of points with very high (absolute) values in PC2 (which we know is related to lfc), so this result is consistent with the high correlation between Module and Diagnosis, although some of the points with the highest PC2 values do not belong to these top modules

top_modules = gsub('ME','',rownames(moduleTraitCor)[abs(moduleTraitCor[,'Diagnosis'])>0.9])

cat(paste0('Top modules selected: ', paste(top_modules, collapse=', '),'\n'))
## Top modules selected: #C19B00, #00C1A2
pca = datExpr %>% prcomp

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
            dplyr::select(ID, external_gene_id, PC1, PC2, Module, gene.score) %>%
            mutate(ImportantModules = ifelse(Module %in% top_modules, as.character(Module), 'Others')) %>%
            mutate(color = ifelse(ImportantModules=='Others','gray',ImportantModules),
                   alpha = ifelse(ImportantModules=='Others', 0.2, 0.4),
                   gene_id = paste0(ID, ' (', external_gene_id, ')'))

table(plot_data$ImportantModules)
## 
## #00C1A2 #C19B00  Others 
##    1315    2141   13035
ggplotly(plot_data %>% ggplot(aes(PC1, PC2, color=ImportantModules)) + 
         geom_point(alpha=plot_data$alpha, color=plot_data$color, aes(ID=gene_id)) + theme_minimal() + 
           ggtitle('Modules with strongest relation to Diagnosis'))
rm(pca)




Module Membership vs Gene Significance


SFARI Genes seem to have high levels of Gene Significance and Module Membership in the Modules

create_plot = function(module){
  
  plot_data = dataset %>% dplyr::select(ID, paste0('MM.',gsub('#','',module)), GS, gene.score) %>% filter(dataset$Module==module)
  colnames(plot_data)[2] = 'Module'
  
  SFARI_colors = as.numeric(names(table(as.character(plot_data$gene.score)[plot_data$gene.score!='None'])))
  
  p = ggplotly(plot_data %>% ggplot(aes(Module, GS, color=gene.score)) + geom_point(alpha=0.5, aes(ID=ID)) + ylab('Gene Significance') +
               scale_color_manual(values=SFARI_colour_hue(r=c(SFARI_colors,8))) + theme_minimal() + xlab('Module Membership') +
               ggtitle(paste0('Module ', module,'  (MTcor = ', round(moduleTraitCor[paste0('ME',module),1],2),')')))
  
  return(p)
}

create_plot(top_modules[1])
create_plot(top_modules[2])
rm(create_plot)




SFARI Genes


List of SFARI Genes in top modules ordered by SFARI score and Gene Significance

table_data = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
             dplyr::select(ID, external_gene_id, GS, gene.score, Module) %>% arrange(gene.score, desc(abs(GS))) %>%
             dplyr::rename('Ensembl ID'=ID, 'Gene Symbol'=external_gene_id, 
                    'SFARI score'=gene.score, 'Gene Significance'=GS)


kable(table_data %>% filter(Module == top_modules[1] & `SFARI score` != 'None') %>% dplyr::select(-Module),
      caption=paste0('SFARI Genes for Module ', top_modules[1]))
SFARI Genes for Module #C19B00
Ensembl ID Gene Symbol Gene Significance SFARI score
ENSG00000141431 ASXL3 0.8785457 1
ENSG00000119335 SET 0.9218670 2
ENSG00000155511 GRIA1 0.8835654 2
ENSG00000169862 CTNND2 0.8586459 2
ENSG00000196557 CACNA1H 0.8552054 2
ENSG00000169432 SCN9A 0.8247991 2
ENSG00000114166 KAT2B 0.7290687 2
ENSG00000147050 KDM6A 0.7203022 2
ENSG00000177565 TBL1XR1 0.6913837 2
ENSG00000021574 SPAST 0.6804570 2
ENSG00000103507 BCKDK 0.5763334 2
ENSG00000179915 NRXN1 0.5254140 2
ENSG00000079482 OPHN1 0.9850829 3
ENSG00000116117 PARD3B 0.9747438 3
ENSG00000109911 ELP4 0.9225894 3
ENSG00000136425 CIB2 0.8722078 3
ENSG00000164050 PLXNB1 0.8657312 3
ENSG00000181722 ZBTB20 0.8349265 3
ENSG00000196839 ADA 0.8235366 3
ENSG00000076716 GPC4 0.8058872 3
ENSG00000131165 CHMP1A 0.8023088 3
ENSG00000162946 DISC1 0.7804632 3
ENSG00000196092 PAX5 0.6988833 3
ENSG00000118432 CNR1 0.6828557 3
ENSG00000205581 HMGN1 0.6671115 3
ENSG00000149571 KIRREL3 0.6477266 3
ENSG00000089006 SNX5 0.5934084 3
ENSG00000139726 DENR 0.5868767 3
ENSG00000177807 KCNJ10 0.5603723 3
ENSG00000100033 PRODH 0.5527687 3
ENSG00000165995 CACNB2 0.5473487 3
ENSG00000164418 GRIK2 0.5458645 3
ENSG00000158321 AUTS2 0.5084040 3
ENSG00000128849 CGNL1 0.4705224 3
ENSG00000137672 TRPC6 0.4518504 3
ENSG00000114062 UBE3A 0.4389561 3
ENSG00000140945 CDH13 0.4193203 3
ENSG00000107077 KDM4C 0.3672602 3
ENSG00000152910 CNTNAP4 0.2005084 3
ENSG00000182985 CADM1 0.9339505 4
ENSG00000147862 NFIB 0.9310289 4
ENSG00000152208 GRID2 0.9112596 4
ENSG00000149403 GRIK4 0.9071851 4
ENSG00000156298 TSPAN7 0.9050752 4
ENSG00000147044 CASK 0.9039260 4
ENSG00000103528 SYT17 0.9028547 4
ENSG00000107104 KANK1 0.8876488 4
ENSG00000129911 KLF16 0.8872244 4
ENSG00000005379 BZRAP1 0.8579167 4
ENSG00000080298 RFX3 0.8542825 4
ENSG00000169439 SDC2 0.8467704 4
ENSG00000115159 GPD2 0.8381263 4
ENSG00000131374 TBC1D5 0.8372299 4
ENSG00000172554 SNTG2 0.8160934 4
ENSG00000183098 GPC6 0.8106561 4
ENSG00000197977 ELOVL2 0.7839541 4
ENSG00000119125 GDA 0.7503866 4
ENSG00000162599 NFIA 0.7478687 4
ENSG00000113100 CDH9 0.7457152 4
ENSG00000071246 VASH1 0.7427122 4
ENSG00000102882 MAPK3 0.7292824 4
ENSG00000105926 MPP6 0.7203155 4
ENSG00000150394 CDH8 0.7175609 4
ENSG00000189221 MAOA 0.7097246 4
ENSG00000151612 ZNF827 0.7063249 4
ENSG00000156011 PSD3 0.7010282 4
ENSG00000111961 SASH1 0.6992862 4
ENSG00000166736 HTR3A 0.6679956 4
ENSG00000164638 SLC29A4 0.6491610 4
ENSG00000137713 PPP2R1B 0.6429055 4
ENSG00000100852 ARHGAP5 0.6413589 4
ENSG00000101958 GLRA2 0.6321659 4
ENSG00000189283 FHIT 0.6091543 4
ENSG00000175745 NR2F1 0.6052473 4
ENSG00000170004 CHD3 0.6035037 4
ENSG00000102781 KATNAL1 0.6026056 4
ENSG00000157152 SYN2 0.5936906 4
ENSG00000070367 EXOC5 0.5676244 4
ENSG00000007314 SCN4A 0.5606462 4
ENSG00000188641 DPYD 0.5426447 4
ENSG00000153266 FEZF2 0.5371557 4
ENSG00000198286 CARD11 0.4951990 4
ENSG00000144036 EXOC6B 0.4711673 4
ENSG00000198690 FAN1 0.4503471 4
ENSG00000153187 HNRNPU 0.4463083 4
ENSG00000224389 C4B 0.4037258 4
ENSG00000145794 MEGF10 0.3904062 4
ENSG00000157890 MEGF11 0.3753489 4
ENSG00000099954 CECR2 0.3398445 4
ENSG00000131771 PPP1R1B 0.2518863 4
ENSG00000070371 CLTCL1 0.1365827 4
ENSG00000147402 GABRQ 0.9598506 5
ENSG00000116852 KIF21B 0.8867727 5
ENSG00000131795 RBM8A 0.8854904 5
ENSG00000211460 TSN 0.8722982 5
ENSG00000163288 GABRB1 0.8722409 5
ENSG00000186297 GABRA5 0.8251354 5
ENSG00000050628 PTGER3 0.8224128 5
ENSG00000100030 MAPK1 0.8199824 5
ENSG00000112038 OPRM1 0.7778386 5
ENSG00000139734 DIAPH3 0.7364629 5
ENSG00000004777 ARHGAP33 0.7354770 5
ENSG00000177791 MYOZ1 0.6793381 5
ENSG00000169855 ROBO1 0.6134188 5
ENSG00000147852 VLDLR 0.5714398 5
ENSG00000124406 ATP8A1 0.5590179 5
ENSG00000172020 GAP43 0.4984025 5
ENSG00000173406 DAB1 0.4866904 5
ENSG00000178568 ERBB4 0.4554541 5
ENSG00000171444 MCC 0.4541775 5
ENSG00000138639 ARHGAP24 0.4530811 5
ENSG00000101843 PSMD10 0.4054877 5
ENSG00000116254 CHD5 0.4045844 5
ENSG00000148730 EIF4EBP2 0.3426925 5
ENSG00000080224 EPHA6 0.2654092 5
ENSG00000261609 GAN 0.2252501 5
ENSG00000117009 KMO 0.0682514 5
ENSG00000066468 FGFR2 0.0635940 5
ENSG00000091831 ESR1 NA 5
ENSG00000164434 FABP7 0.9663938 6
ENSG00000172340 SUCLG2 0.8271255 6
ENSG00000160200 CBS 0.7295583 6
ENSG00000072364 AFF4 0.7268624 6
ENSG00000179603 GRM8 0.6767789 6
ENSG00000075884 ARHGAP15 0.5094652 6
ENSG00000171791 BCL2 0.3409641 6
kable(table_data %>% filter(Module == top_modules[2] & `SFARI score` != 'None') %>% dplyr::select(-Module),
      caption=paste0('SFARI Genes for Module ', top_modules[2]))
SFARI Genes for Module #00C1A2
Ensembl ID Gene Symbol Gene Significance SFARI score
ENSG00000136535 TBR1 -0.8840838 1
ENSG00000174469 CNTNAP2 -0.9496760 2
ENSG00000080603 SRCAP -0.8284032 2
ENSG00000155974 GRIP1 -0.7394422 2
ENSG00000169057 MECP2 -0.6872414 2
ENSG00000105976 MET -0.6604030 2
ENSG00000119866 BCL11A -0.6366413 2
ENSG00000124140 SLC12A5 -0.9360019 3
ENSG00000074590 NUAK1 -0.9348188 3
ENSG00000078328 RBFOX1 -0.9186918 3
ENSG00000144285 SCN1A -0.9173283 3
ENSG00000170579 DLGAP1 -0.9137577 3
ENSG00000151150 ANK3 -0.8831013 3
ENSG00000183454 GRIN2A -0.8640176 3
ENSG00000132294 EFR3A -0.8381554 3
ENSG00000157087 ATP2B2 -0.8042493 3
ENSG00000182621 PLCB1 -0.7900910 3
ENSG00000166147 FBN1 -0.7834378 3
ENSG00000197535 MYO5A -0.7768036 3
ENSG00000165300 SLITRK5 -0.7720362 3
ENSG00000132604 TERF2 -0.6862455 3
ENSG00000176884 GRIN1 -0.6792205 3
ENSG00000127616 SMARCA4 -0.6387236 3
ENSG00000146830 GIGYF1 -0.6085437 3
ENSG00000175344 CHRNA7 -0.5995574 3
ENSG00000170396 ZNF804A -0.5809462 3
ENSG00000170471 RALGAPB -0.5332836 3
ENSG00000166148 AVPR1A -0.3651616 3
ENSG00000112655 PTK7 -0.2882900 3
ENSG00000196876 SCN8A NA 3
ENSG00000163618 CADPS -0.9999000 4
ENSG00000159082 SYNJ1 -0.9897629 4
ENSG00000129159 KCNC1 -0.9729424 4
ENSG00000150995 ITPR1 -0.9570730 4
ENSG00000135631 RAB11FIP5 -0.9517540 4
ENSG00000105409 ATP1A3 -0.9390908 4
ENSG00000154263 ABCA10 -0.9299190 4
ENSG00000197635 DPP4 -0.9233889 4
ENSG00000115840 SLC25A12 -0.9230049 4
ENSG00000155886 SLC24A2 -0.9212817 4
ENSG00000006377 DLX6 -0.9139947 4
ENSG00000132639 SNAP25 -0.8940219 4
ENSG00000144406 UNC80 -0.8897701 4
ENSG00000157483 MYO1E -0.8613604 4
ENSG00000102181 CD99L2 -0.8571033 4
ENSG00000172260 NEGR1 -0.8555347 4
ENSG00000165355 FBXO33 -0.8552039 4
ENSG00000115421 PAPOLG -0.8488878 4
ENSG00000144331 ZNF385B -0.8379079 4
ENSG00000144290 SLC4A10 -0.8341455 4
ENSG00000128594 LRRC4 -0.8308582 4
ENSG00000100038 TOP3B -0.8235623 4
ENSG00000163399 ATP1A1 -0.8174485 4
ENSG00000101883 RHOXF1 -0.8112958 4
ENSG00000087460 GNAS -0.8065352 4
ENSG00000047188 YTHDC2 -0.8055959 4
ENSG00000117594 HSD11B1 -0.8000815 4
ENSG00000081803 CADPS2 -0.7852218 4
ENSG00000117016 RIMS3 -0.7802470 4
ENSG00000109906 ZBTB16 -0.7646739 4
ENSG00000014138 POLA2 -0.7381408 4
ENSG00000148935 GAS2 -0.7378531 4
ENSG00000215045 GRID2IP -0.7272142 4
ENSG00000130402 ACTN4 -0.7186294 4
ENSG00000109158 GABRA4 -0.7160039 4
ENSG00000174938 SEZ6L2 -0.7072858 4
ENSG00000184408 KCND2 -0.6836501 4
ENSG00000182901 RGS7 -0.6799908 4
ENSG00000139915 MDGA2 -0.6440130 4
ENSG00000042781 USH2A -0.6424911 4
ENSG00000160862 AZGP1 -0.5968575 4
ENSG00000264424 MYH4 -0.5885198 4
ENSG00000169083 AR -0.0931115 4
ENSG00000152402 GUCY1A2 -0.0444340 4
ENSG00000204466 DGKK -0.9819312 5
ENSG00000145740 SLC30A5 -0.9761635 5
ENSG00000178718 RPP25 -0.9686983 5
ENSG00000174996 KLC2 -0.9683999 5
ENSG00000128739 SNRPN -0.9259603 5
ENSG00000118596 SLC16A7 -0.9142353 5
ENSG00000065989 PDE4A -0.8574696 5
ENSG00000156463 SH3RF2 -0.8562647 5
ENSG00000185666 SYN3 -0.8560706 5
ENSG00000100362 PVALB -0.8269023 5
ENSG00000007171 NOS2 -0.8216243 5
ENSG00000082701 GSK3B -0.8126652 5
ENSG00000140527 WDR93 -0.8066951 5
ENSG00000151148 UBE3B -0.7972316 5
ENSG00000139182 CLSTN3 -0.7821418 5
ENSG00000107147 KCNT1 -0.7780941 5
ENSG00000197381 ADARB1 -0.7529056 5
ENSG00000106113 CRHR2 -0.7453032 5
ENSG00000106123 EPHB6 -0.7341812 5
ENSG00000008735 MAPK8IP2 -0.6747744 5
ENSG00000157168 NRG1 -0.5103462 5
ENSG00000022355 GABRA1 NA 5
ENSG00000104725 NEFL NA 5
ENSG00000171735 CAMTA1 NA 5
ENSG00000213023 SYT3 -0.8957519 6

Modules with the strongest module-diagnosis correlation should have the highest percentage of SFARI Genes, but this doesn’t seem to be the case

plot_data = dataset %>% mutate('hasSFARIscore' = gene.score!='None') %>% 
            group_by(Module, MTcor, hasSFARIscore) %>% summarise(p=n()) %>% 
            left_join(dataset %>% group_by(Module) %>% summarise(n=n()), by='Module') %>% 
            mutate(p=round(p/n*100,2)) 

for(i in 1:nrow(plot_data)){
  this_row = plot_data[i,]
  if(this_row$hasSFARIscore==FALSE & this_row$p==100){
    new_row = this_row
    new_row$hasSFARIscore = TRUE
    new_row$p = 0
    plot_data = plot_data %>% rbind(new_row)
  }
}

plot_data = plot_data %>% filter(hasSFARIscore==TRUE)

ggplotly(plot_data %>% ggplot(aes(MTcor, p, size=n)) + geom_smooth(color='gray', se=FALSE) +
         geom_point(color=plot_data$Module, alpha=0.5, aes(id=Module)) + geom_hline(yintercept=mean(plot_data$p), color='gray') +
         xlab('Module-Diagnosis correlation') + ylab('% of SFARI genes') +
         theme_minimal() + theme(legend.position = 'none'))
rm(i, this_row, new_row, plot_data)




Module Eigengenes


Since these modules have the strongest relation to autism, this pattern should be reflected in their model eigengenes, having two different behaviours for the samples corresponding to autism and the ones corresponding to control.

In all cases, the Eigengenes separate the behaviour between autism and control samples very clearly!

plot_EGs = function(module){

  plot_data = data.frame('ID' = rownames(MEs), 'MEs' = MEs[,paste0('ME',module)], 'Diagnosis' = datMeta$Diagnosis)

  p = plot_data %>% ggplot(aes(Diagnosis, MEs, fill=Diagnosis)) + geom_boxplot() + theme_minimal() + theme(legend.position='none') +
                    ggtitle(paste0('Module ', module, '  (MTcor=',round(moduleTraitCor[paste0('ME',module),1],2),')'))
  return(p)
}

p1 = plot_EGs(top_modules[1])
p2 = plot_EGs(top_modules[2])

grid.arrange(p1, p2, nrow=1)

rm(plot_EGs, p1, p2)




Identifying important genes


Selecting the modules with the highest correlation to Diagnosis, and, from them, the genes with the highest module membership-(absolute) gene significance

*Ordered by \(\frac{MM+|GS|}{2}\)

There aren’t that many SFARI genes in the top genes of the modules and not a single one belonging to scores 1 and 2

create_table = function(module){
  top_genes = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>% 
              dplyr::select(ID, external_gene_id, paste0('MM.',gsub('#','',module)), GS, gene.score) %>%
              filter(dataset$Module==module) %>% dplyr::rename('MM' = paste0('MM.',gsub('#','',module))) %>% 
              mutate(importance = (MM+abs(GS))/2) %>% arrange(by=-importance) %>% top_n(20)
  return(top_genes)
}

top_genes_1 = create_table(top_modules[1])
kable(top_genes_1, caption=paste0('Top 10 genes for module ', top_modules[1], '  (MTcor = ',
                                  round(moduleTraitCor[paste0('ME',top_modules[1]),1],2),')'))
Top 10 genes for module #C19B00 (MTcor = 1)
ID external_gene_id MM GS gene.score importance
ENSG00000144369 FAM171B 0.9361619 0.9999000 None 0.9680309
ENSG00000172380 GNG12 0.9159014 0.9999000 None 0.9579007
ENSG00000135299 ANKRD6 0.9138902 0.9969959 None 0.9554431
ENSG00000162734 PEA15 0.9034754 0.9968303 None 0.9501529
ENSG00000181467 RAP2B 0.8998701 0.9999000 None 0.9498850
ENSG00000147402 GABRQ 0.9356259 0.9598506 5 0.9477382
ENSG00000164199 GPR98 0.8923921 0.9999000 None 0.9461460
ENSG00000116117 PARD3B 0.9115571 0.9747438 3 0.9431504
ENSG00000046653 GPM6B 0.9454595 0.9399552 None 0.9427074
ENSG00000178252 WDR6 0.9056134 0.9736618 None 0.9396376
ENSG00000163110 PDLIM5 0.9252966 0.9439713 None 0.9346339
ENSG00000137033 IL33 0.8700031 0.9959383 None 0.9329707
ENSG00000135052 GOLM1 0.9148458 0.9508269 None 0.9328364
ENSG00000121766 ZCCHC17 0.8768631 0.9845722 None 0.9307176
ENSG00000007372 PAX6 0.8708351 0.9850589 None 0.9279470
ENSG00000077721 UBE2A 0.8449761 0.9999000 None 0.9224380
ENSG00000145555 MYO10 0.8648672 0.9761415 None 0.9205044
ENSG00000204060 FOXO6 0.8496752 0.9886059 None 0.9191406
ENSG00000103876 FAH 0.8343395 0.9999000 None 0.9171197
ENSG00000100568 VTI1B 0.9430318 0.8893725 None 0.9162022
top_genes_2 = create_table(top_modules[2])
kable(top_genes_2, caption=paste0('Top 10 genes for module ', top_modules[2], '  (MTcor = ',
                                  round(moduleTraitCor[paste0('ME',top_modules[2]),1],2),')'))
Top 10 genes for module #00C1A2 (MTcor = -1)
ID external_gene_id MM GS gene.score importance
ENSG00000215397 SCRT2 0.9325901 -0.9999000 None 0.9662450
ENSG00000141314 RHBDL3 0.9162767 -0.9903965 None 0.9533366
ENSG00000188582 PAQR9 0.9115714 -0.9916901 None 0.9516307
ENSG00000164588 HCN1 0.9375144 -0.9654055 None 0.9514599
ENSG00000185133 INPP5J 0.9146103 -0.9860537 None 0.9503320
ENSG00000248905 FMN1 0.9006433 -0.9999000 None 0.9502717
ENSG00000162694 EXTL2 0.9130878 -0.9839885 None 0.9485382
ENSG00000113327 GABRG2 0.9008442 -0.9918740 None 0.9463591
ENSG00000029534 ANK1 0.8946657 -0.9969839 None 0.9458248
ENSG00000051382 PIK3CB 0.8974280 -0.9941465 None 0.9457872
ENSG00000159840 ZYX 0.8993040 -0.9921196 None 0.9457118
ENSG00000115977 AAK1 0.8958363 -0.9954082 None 0.9456223
ENSG00000170049 KCNAB3 0.9193674 -0.9706054 None 0.9449864
ENSG00000164114 MAP9 0.8929287 -0.9941216 None 0.9435252
ENSG00000118596 SLC16A7 0.9710280 -0.9142353 5 0.9426317
ENSG00000141750 STAC2 0.8863954 -0.9973965 None 0.9418960
ENSG00000163624 CDS1 0.8821721 -0.9999000 None 0.9410360
ENSG00000198825 INPP5F 0.8759157 -0.9999000 None 0.9379078
ENSG00000185760 KCNQ5 0.9249319 -0.9488275 None 0.9368797
ENSG00000124198 ARFGEF2 0.8773453 -0.9961151 None 0.9367302
rm(create_table)
pca = datExpr %>% prcomp

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% dplyr::select(ID, PC1, PC2, Module, gene.score) %>%
            mutate(color = ifelse(Module %in% top_modules, as.character(Module), 'gray')) %>%
            mutate(alpha = ifelse(color %in% top_modules & 
                                  ID %in% c(as.character(top_genes_1$ID), 
                                            as.character(top_genes_2$ID)), 1, 0.05))

plot_data %>% ggplot(aes(PC1, PC2)) + geom_point(alpha=plot_data$alpha, color=plot_data$color) + 
              theme_minimal() + ggtitle('Important genes identified through WGCNA')




Enrichment Analysis


Using the package anRichment

# Prepare dataset

# Create dataset with top modules membership and removing the genes without an assigned module
EA_dataset = data.frame('ensembl_gene_id' = genes_info$ID,
                        module = ifelse(genes_info$Module %in% top_modules, genes_info$Module, 'other')) %>%
             filter(genes_info$Module!='gray')

# Assign Entrez Gene Id to each gene
getinfo = c('ensembl_gene_id','entrezgene')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl', host='feb2014.archive.ensembl.org')
biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=EA_dataset$ensembl_gene_id, mart=mart)
## Batch submitting query [=>------------------------] 6% eta: 14s
## Batch submitting query [==>-----------------------] 12% eta: 11s
## Batch submitting query [====>---------------------] 18% eta: 8s
## Batch submitting query [=====>--------------------] 24% eta: 7s
## Batch submitting query [=======>------------------] 29% eta: 7s
## Batch submitting query [========>-----------------] 35% eta: 6s
## Batch submitting query [==========>---------------] 41% eta: 5s
## Batch submitting query [===========>--------------] 47% eta: 5s
## Batch submitting query [=============>------------] 53% eta: 4s
## Batch submitting query [==============>-----------] 59% eta: 3s
## Batch submitting query [================>---------] 65% eta: 3s
## Batch submitting query [=================>--------] 71% eta: 2s
## Batch submitting query [===================>------] 76% eta: 2s Batch
## submitting query [====================>-----] 82% eta: 1s Batch submitting
## query [======================>---] 88% eta: 1s Batch submitting query
## [=======================>--] 94% eta: 0s
EA_dataset = EA_dataset %>% left_join(biomart_output, by='ensembl_gene_id')

for(tm in top_modules){
  cat(paste0('\n',sum(EA_dataset$module==tm & is.na(EA_dataset$entrezgene)), ' genes from top module ',
             tm, ' don\'t have an Entrez Gene ID')) 
}
## 
## 33 genes from top module #C19B00 don't have an Entrez Gene ID
## 17 genes from top module #00C1A2 don't have an Entrez Gene ID
rm(getinfo, mart, biomart_output, tm)
# Manual: https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/GeneAnnotation/Tutorials/anRichment-Tutorial1.pdf

collectGarbage()

# EA_dataset = rbind(EA_dataset[EA_dataset$module!='other',], EA_dataset[EA_dataset$module=='other',][sample(sum(EA_dataset$module=='other'), 1000),])

# Prepare datasets
GO_col = buildGOcollection(organism = 'human', verbose = 0)
## Loading required package: org.Hs.eg.db
## 
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
internal_col = internalCollection(organism = 'human')
MillerAIBS_col = MillerAIBSCollection(organism = 'human')
BrainDisease_col = BrainDiseaseCollection(organism = 'human')
combined_col = mergeCollections(GO_col, internal_col, MillerAIBS_col, BrainDisease_col)

# Print collections used
cat('Using collections: ')
## Using collections:
knownGroups(combined_col, sortBy = 'size')
##  [1] "GO"                                         
##  [2] "GO.BP"                                      
##  [3] "GO.MF"                                      
##  [4] "GO.CC"                                      
##  [5] "JA Miller at AIBS"                          
##  [6] "Chip-X enrichment analysis (ChEA)"          
##  [7] "Brain"                                      
##  [8] "JAM"                                        
##  [9] "Prenatal brain"                             
## [10] "Brain region markers"                       
## [11] "Cortex"                                     
## [12] "Brain region marker enriched gene sets"     
## [13] "WGCNA"                                      
## [14] "BrainRegionMarkers"                         
## [15] "BrainRegionMarkers.HBA"                     
## [16] "BrainRegionMarkers.HBA.localMarker(top200)" 
## [17] "Postnatal brain"                            
## [18] "ImmunePathways"                             
## [19] "Markers of cortex layers"                   
## [20] "BrainLists"                                 
## [21] "Cell type markers"                          
## [22] "Germinal brain"                             
## [23] "BrainRegionMarkers.HBA.globalMarker(top200)"
## [24] "Accelerated evolution"                      
## [25] "Postmitotic brain"                          
## [26] "BrainLists.Blalock_AD"                      
## [27] "BrainLists.DiseaseGenes"                    
## [28] "BloodAtlases"                               
## [29] "Verge Disease Genes"                        
## [30] "BloodAtlases.Whitney"                       
## [31] "BrainLists.JAXdiseaseGene"                  
## [32] "BrainLists.MO"                              
## [33] "Age-associated genes"                       
## [34] "BrainLists.Lu_Aging"                        
## [35] "Cell type marker enriched gene sets"        
## [36] "BrainLists.CA1vsCA3"                        
## [37] "BrainLists.MitochondrialType"               
## [38] "BrainLists.MO.2+_26Mar08"                   
## [39] "BrainLists.MO.Sugino"                       
## [40] "BloodAtlases.Gnatenko2"                     
## [41] "BloodAtlases.Kabanova"                      
## [42] "BrainLists.Voineagu"                        
## [43] "StemCellLists"                              
## [44] "StemCellLists.Lee"
# Perform Enrichment Analysis
enrichment = enrichmentAnalysis(classLabels = EA_dataset$module, identifiers = EA_dataset$entrezgene,
                                refCollection = combined_col, useBackground = 'given', 
                                threshold = 1e-4, thresholdType = 'Bonferroni',
                                getOverlapEntrez = FALSE, getOverlapSymbols = TRUE)
##  enrichmentAnalysis: preparing data..
##   ..working on label set 1 ..


Results


kable(enrichment$enrichmentTable %>% filter(class==top_modules[1]) %>% 
      dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR,
                    effectiveClassSize, effectiveSetSize, nCommonGenes),
      caption = paste0('Enriched terms for module ', top_modules[1], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[1]][1],4), ')'))
Enriched terms for module #C19B00 (MTcor = 0.9999)
dataSetID shortDataSetName inGroups Bonferroni FDR effectiveClassSize effectiveSetSize nCommonGenes
JAMiller.AIBS.000084 Bergman glia JA Miller at AIBS|Brain|Postnatal brain|Cell type markers 0.00e+00 0e+00 2153 1211 341
JAM:002807 Cingulate Gyrus JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 2153 151 95
JAMiller.AIBS.000085 Cerebellar astrocytes JA Miller at AIBS|Brain|Postnatal brain|Cell type markers 0.00e+00 0e+00 2153 1429 378
JAMiller.AIBS.000143 Lowest in CP of 13-16 post-conception weeks human JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex 0.00e+00 0e+00 2153 1433 371
JAMiller.AIBS.000009 VZ markers at 15-16 post-conception weeks JA Miller at AIBS|Brain|Prenatal brain|Cortex|Markers of cortex layers|Germinal brain 0.00e+00 0e+00 2153 1238 328
JAMiller.AIBS.000097 Cortical astrocytes JA Miller at AIBS|Brain|Postnatal brain|Cell type markers|Cortex 0.00e+00 0e+00 2153 2289 505
JAM:003078 Temporal Lobe_IN_Cerebral Cortex JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 2153 163 81
JAMiller.AIBS.000148 Highest in VZ of 13-16 post-conception weeks human JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Germinal brain 0.00e+00 0e+00 2153 672 195
JAMiller.AIBS.000112 HippocampusWGCNA greenyellow SGZenriched astrocyte JA Miller at AIBS|Brain|Postnatal brain|WGCNA 0.00e+00 0e+00 2153 245 96
JAM:002981 Parahippocampal Gyrus JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 2153 153 72
JAMiller.AIBS.000098 Cortical oligodendrocytes (Olig2) JA Miller at AIBS|Brain|Postnatal brain|Cell type markers|Cortex 0.00e+00 0e+00 2153 2254 447
JAM:002731 Amygdala JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 2153 160 71
JAM:002773 upAging_copperHomeostatisMT1 JAM|BrainLists|BrainLists.Lu_Aging 0.00e+00 0e+00 2153 119 59
JAMiller.AIBS.000154 Highest in VZ of E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Germinal brain 0.00e+00 0e+00 2153 1704 356
JAMiller.AIBS.000193 CortexWGCNA midfetal M23 JA Miller at AIBS|Brain|Prenatal brain|WGCNA 0.00e+00 0e+00 2153 998 232
JAM:002909 Insula JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 2153 150 63
JAMiller.AIBS.000082 Cerebellar oligodendrocytes (Olig2) JA Miller at AIBS|Brain|Postnatal brain|Cell type markers 0.00e+00 0e+00 2153 1489 306
JAM:002852 frontal part, superior bank of gyrus_IN_Cingulate Gyrus JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 2153 151 61
JAMiller.AIBS.000506 Genes bound by SUZ12 in MOUSE MESC from PMID 20075857 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 2153 3398 584
JAMiller.AIBS.000106 Genes enriched in the hippocampal SGZ in mouse JA Miller at AIBS|Brain|Postnatal brain|Markers of cortex layers 0.00e+00 0e+00 2153 336 96
JAMiller.AIBS.000126 Subependymal zone parenchymalAstrocytesFromDiencephalon(GFAP+/inDiencephalon) JA Miller at AIBS|Brain|Postnatal brain|Cell type markers 0.00e+00 0e+00 2153 107 45
JAMiller.AIBS.000466 Genes bound by SMARCA4 in MOUSE OLIGODENDROCYTES from PMID 23332759 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 2153 2052 378
JAMiller.AIBS.000064 CortexWGCNA 15-21 post-conception weeks C38 cellCycle SZ/VZenriched JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 0.00e+00 0e+00 2153 528 130
JAMiller.AIBS.000364 Genes bound by MTF2 in MOUSE MESC from PMID 20144788 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 2153 2301 414
JAMiller.AIBS.000204 RegionalWGCNA midfetal M34 JA Miller at AIBS|Brain|Prenatal brain|WGCNA 0.00e+00 0e+00 2153 417 109
JAMiller.AIBS.000504 Genes bound by SUZ12 in mouse MESC from PMID 18692474 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 2153 1372 270
JAMiller.AIBS.000537 Genes bound by TP63 in HUMAN EP156T from PMID 23658742 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 1.00e-07 0e+00 2153 2813 487
JAMiller.AIBS.000505 Genes bound by SUZ12 in MOUSE MESC from PMID 18974828 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 1.00e-07 0e+00 2153 1396 272
JAM:003117 noChangeAD_antigenProcessing_ribosome JAM|BrainLists|BrainLists.Blalock_AD 2.00e-07 0e+00 2153 244 73
JAMiller.AIBS.000075 GerminalZonesWGCNA 15-21 post-conception weeks G7 VZ-enriched JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA|Germinal brain 9.00e-07 0e+00 2153 609 139
GO:0007423 sensory organ development GO|GO.BP 1.10e-06 0e+00 2153 480 116
JAMiller.AIBS.000138 VZ/SZ/IZ enriched in E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Germinal brain 2.40e-06 0e+00 2153 333 88
GO:0007154 cell communication GO|GO.BP 2.90e-06 0e+00 2153 5344 832
JAMiller.AIBS.000110 HippocampusWGCNA cyan SGZenriched upAge glia/gliogenesis JA Miller at AIBS|Brain|Postnatal brain|WGCNA 3.20e-06 0e+00 2153 151 51
GO:0005886 plasma membrane GO|GO.CC 7.10e-06 1e-07 2153 4326 689
JAMiller.AIBS.000002 SZo markers at 21 post-conception weeks JA Miller at AIBS|Brain|Prenatal brain|Cortex|Markers of cortex layers|Germinal brain 1.30e-05 1e-07 2153 254 71
GO:0023052 signaling GO|GO.BP 1.87e-05 2e-07 2153 5329 824
JAMiller.AIBS.000163 Genes decreasing in fetal and increasing in aging JA Miller at AIBS|Brain|Age-associated genes|Cortex 2.94e-05 2e-07 2153 65 29
GO:0071944 cell periphery GO|GO.CC 3.12e-05 2e-07 2153 4431 699
GO:0009653 anatomical structure morphogenesis GO|GO.BP 3.15e-05 2e-07 2153 2314 398
GO:0048869 cellular developmental process GO|GO.BP 3.47e-05 3e-07 2153 3653 590
JAMiller.AIBS.000203 RegionalWGCNA midfetal M33 Striatum+Thalamus+Cerebellum JA Miller at AIBS|Brain|Prenatal brain|WGCNA 3.76e-05 3e-07 2153 461 108
JAMiller.AIBS.000062 CortexWGCNA 15-21 post-conception weeks C36 SZ/VZenriched JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 4.77e-05 3e-07 2153 152 49
GO:0003008 system process GO|GO.BP 6.15e-05 4e-07 2153 1528 279
GO:0048468 cell development GO|GO.BP 7.27e-05 5e-07 2153 1929 339
JAMiller.AIBS.000076 GerminalZonesWGCNA 15-21 post-conception weeks G8 VZ-enriched JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA|Germinal brain 8.01e-05 5e-07 2153 793 163
GO:0007507 heart development GO|GO.BP 8.84e-05 6e-07 2153 502 114
kable(enrichment$enrichmentTable %>% filter(class==top_modules[2]) %>% 
      dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR,
                    effectiveClassSize, effectiveSetSize, nCommonGenes),
      caption = paste0('Enriched terms for module ', top_modules[2], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[2]][1],4), ')'))
Enriched terms for module #00C1A2 (MTcor = -0.9999)
dataSetID shortDataSetName inGroups Bonferroni FDR effectiveClassSize effectiveSetSize nCommonGenes
JAM:002744 Autism_differential_expression_across_at_least_one_comparison JAM|BrainLists|BrainLists.Voineagu 0.00e+00 0e+00 1327 760 166
JAMiller.AIBS.000142 Highest in CP of 13-16 post-conception weeks human JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain 0.00e+00 0e+00 1327 1211 208
JAM:002967 Occipital Lobe_IN_Cerebral Cortex JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 1327 155 58
JAMiller.AIBS.000506 Genes bound by SUZ12 in MOUSE MESC from PMID 20075857 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 1327 3398 418
JAM:002985 Parietal Lobe_IN_Cerebral Cortex JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 1327 120 48
JAM:002986 parietal part, inferior bank of gyrus_IN_Cingulate Gyrus JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 1327 122 48
JAM:002824 Dentate Nucleus_IN_Cerebellar Nucleus JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 1327 166 55
JAMiller.AIBS.000569 WGCNA humanSpecificOlivedrab2Module frontalCtx FOXP2 JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA 0.00e+00 0e+00 1327 4047 468
JAMiller.AIBS.000155 Lowest in VZ of E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex 0.00e+00 0e+00 1327 1657 236
JAMiller.AIBS.000150 Highest in CP of E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain 0.00e+00 0e+00 1327 1265 194
GO:0005215 transporter activity GO|GO.MF 0.00e+00 0e+00 1327 997 153
GO:0022857 transmembrane transporter activity GO|GO.MF 0.00e+00 0e+00 1327 915 142
GO:0097458 neuron part GO|GO.CC 0.00e+00 0e+00 1327 1611 214
GO:0015318 inorganic molecular entity transmembrane transporter activity GO|GO.MF 0.00e+00 0e+00 1327 720 116
GO:0015075 ion transmembrane transporter activity GO|GO.MF 0.00e+00 0e+00 1327 773 122
GO:0045202 synapse GO|GO.CC 1.00e-07 0e+00 1327 1112 156
GO:0022890 inorganic cation transmembrane transporter activity GO|GO.MF 3.00e-07 0e+00 1327 525 90
JAM:003016 downAD_synapticTransmission JAM|BrainLists|BrainLists.Blalock_AD 3.00e-07 0e+00 1327 89 30
JAMiller.AIBS.000504 Genes bound by SUZ12 in mouse MESC from PMID 18692474 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 5.00e-07 0e+00 1327 1372 181
GO:0006811 ion transport GO|GO.BP 6.00e-07 0e+00 1327 1483 192
GO:0008324 cation transmembrane transporter activity GO|GO.MF 6.00e-07 0e+00 1327 566 94
GO:0015079 potassium ion transmembrane transporter activity GO|GO.MF 7.00e-07 0e+00 1327 145 39
GO:0044456 synapse part GO|GO.CC 7.00e-07 0e+00 1327 889 130
JAMiller.AIBS.000078 Cerebellar granule cells JA Miller at AIBS|Brain|Postnatal brain|Cell type markers 1.00e-06 0e+00 1327 1513 194
GO:0034220 ion transmembrane transport GO|GO.BP 1.00e-06 0e+00 1327 1054 147
JAM:003072 Tail of Caudate Nucleus_IN_Striatum JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 1.10e-06 0e+00 1327 160 41
JAMiller.AIBS.000123 HippocampusWGCNA turquoise DGenriched upAge JA Miller at AIBS|Brain|Postnatal brain|WGCNA 1.10e-06 0e+00 1327 1103 152
GO:0015672 monovalent inorganic cation transport GO|GO.BP 1.30e-06 0e+00 1327 455 80
GO:1902495 transmembrane transporter complex GO|GO.CC 1.30e-06 0e+00 1327 296 60
GO:1990351 transporter complex GO|GO.CC 1.40e-06 0e+00 1327 304 61
GO:0046873 metal ion transmembrane transporter activity GO|GO.MF 1.50e-06 0e+00 1327 391 72
GO:0007268 chemical synaptic transmission GO|GO.BP 2.20e-06 0e+00 1327 640 101
GO:0098916 anterograde trans-synaptic signaling GO|GO.BP 2.20e-06 0e+00 1327 640 101
GO:0006813 potassium ion transport GO|GO.BP 2.50e-06 0e+00 1327 212 48
JAMiller.AIBS.000364 Genes bound by MTF2 in MOUSE MESC from PMID 20144788 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 3.40e-06 0e+00 1327 2301 268
GO:0099536 synaptic signaling GO|GO.BP 3.50e-06 0e+00 1327 654 102
GO:0099537 trans-synaptic signaling GO|GO.BP 4.50e-06 0e+00 1327 648 101
JAM:002847 facial motor nucleus_IN_Pontine Tegmentum JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 6.30e-06 1e-07 1327 155 39
JAM:003047 Somatic JAM|BrainLists|BrainLists.MitochondrialType 6.40e-06 1e-07 1327 142 37
GO:0071804 cellular potassium ion transport GO|GO.BP 7.60e-06 1e-07 1327 190 44
GO:0071805 potassium ion transmembrane transport GO|GO.BP 7.60e-06 1e-07 1327 190 44
GO:0022839 ion gated channel activity GO|GO.MF 7.90e-06 1e-07 1327 301 59
JAM:002763 downAD_metalIonTransport_glycoprotein JAM|BrainLists|BrainLists.Blalock_AD 1.05e-05 1e-07 1327 280 56
GO:0005887 integral component of plasma membrane GO|GO.CC 1.08e-05 1e-07 1327 1393 178
GO:0015077 monovalent inorganic cation transmembrane transporter activity GO|GO.MF 1.11e-05 1e-07 1327 327 62
GO:0005249 voltage-gated potassium channel activity GO|GO.MF 1.16e-05 1e-07 1327 78 26
GO:0005244 voltage-gated ion channel activity GO|GO.MF 1.30e-05 1e-07 1327 179 42
GO:0022832 voltage-gated channel activity GO|GO.MF 1.30e-05 1e-07 1327 179 42
GO:0022803 passive transmembrane transporter activity GO|GO.MF 1.48e-05 1e-07 1327 402 71
GO:0034702 ion channel complex GO|GO.CC 1.54e-05 1e-07 1327 275 55
JAMiller.AIBS.000570 WGCNA Olivedrab2ModuleGenes with enriched ELAVL2 targets JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA 1.93e-05 2e-07 1327 480 80
JAM:002805 Cerebral Cortex JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 2.12e-05 2e-07 1327 161 39
GO:0005216 ion channel activity GO|GO.MF 2.58e-05 2e-07 1327 374 67
GO:0031226 intrinsic component of plasma membrane GO|GO.CC 2.64e-05 2e-07 1327 1459 183
GO:0022836 gated channel activity GO|GO.MF 2.65e-05 2e-07 1327 310 59
GO:0098660 inorganic ion transmembrane transport GO|GO.BP 2.84e-05 2e-07 1327 742 109
GO:0015267 channel activity GO|GO.MF 3.36e-05 3e-07 1327 401 70
GO:0043005 neuron projection GO|GO.CC 3.76e-05 3e-07 1327 1215 158
GO:0097060 synaptic membrane GO|GO.CC 4.05e-05 3e-07 1327 411 71
GO:0071944 cell periphery GO|GO.CC 5.44e-05 4e-07 1327 4431 453
JAMiller.AIBS.000134 Layer4 enriched in adult macaque cortex JA Miller at AIBS|Brain|Postnatal brain|Markers of cortex layers|Cortex 5.63e-05 4e-07 1327 139 35
GO:0005886 plasma membrane GO|GO.CC 7.66e-05 5e-07 1327 4326 443
GO:0022838 substrate-specific channel activity GO|GO.MF 8.14e-05 5e-07 1327 384 67
JAM:002927 Long Insular Gyri_IN_Insula JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 9.22e-05 6e-07 1327 109 30

Save Enrichment Analysis results

save(enrichment, file='./../Data/enrichmentAnalysis.RData')



Session info

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Scientific Linux 7.6 (Nitrogen)
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] org.Hs.eg.db_3.10.0                      
##  [2] BrainDiseaseCollection_1.00              
##  [3] anRichment_1.01-2                        
##  [4] TxDb.Mmusculus.UCSC.mm10.knownGene_3.10.0
##  [5] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2  
##  [6] GenomicFeatures_1.38.2                   
##  [7] GenomicRanges_1.38.0                     
##  [8] GenomeInfoDb_1.22.0                      
##  [9] anRichmentMethods_0.90-1                 
## [10] WGCNA_1.68                               
## [11] fastcluster_1.1.25                       
## [12] dynamicTreeCut_1.63-1                    
## [13] GO.db_3.10.0                             
## [14] AnnotationDbi_1.48.0                     
## [15] IRanges_2.20.2                           
## [16] S4Vectors_0.24.3                         
## [17] Biobase_2.46.0                           
## [18] BiocGenerics_0.32.0                      
## [19] biomaRt_2.42.0                           
## [20] knitr_1.24                               
## [21] doParallel_1.0.15                        
## [22] iterators_1.0.12                         
## [23] foreach_1.4.7                            
## [24] polycor_0.7-10                           
## [25] expss_0.10.1                             
## [26] GGally_1.4.0                             
## [27] gridExtra_2.3                            
## [28] viridis_0.5.1                            
## [29] viridisLite_0.3.0                        
## [30] RColorBrewer_1.1-2                       
## [31] dendextend_1.13.3                        
## [32] plotly_4.9.2                             
## [33] glue_1.3.1                               
## [34] reshape2_1.4.3                           
## [35] forcats_0.4.0                            
## [36] stringr_1.4.0                            
## [37] dplyr_0.8.3                              
## [38] purrr_0.3.3                              
## [39] readr_1.3.1                              
## [40] tidyr_1.0.2                              
## [41] tibble_2.1.3                             
## [42] ggplot2_3.2.1                            
## [43] tidyverse_1.3.0                          
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1                backports_1.1.5            
##   [3] Hmisc_4.2-0                 BiocFileCache_1.10.2       
##   [5] plyr_1.8.5                  lazyeval_0.2.2             
##   [7] splines_3.6.0               crosstalk_1.0.0            
##   [9] BiocParallel_1.20.1         robust_0.4-18.2            
##  [11] digest_0.6.24               htmltools_0.4.0            
##  [13] fansi_0.4.1                 magrittr_1.5               
##  [15] checkmate_1.9.4             memoise_1.1.0              
##  [17] fit.models_0.5-14           cluster_2.0.8              
##  [19] annotate_1.64.0             Biostrings_2.54.0          
##  [21] modelr_0.1.5                matrixStats_0.55.0         
##  [23] askpass_1.1                 prettyunits_1.0.2          
##  [25] colorspace_1.4-1            blob_1.2.1                 
##  [27] rvest_0.3.5                 rappdirs_0.3.1             
##  [29] rrcov_1.4-7                 haven_2.2.0                
##  [31] xfun_0.8                    crayon_1.3.4               
##  [33] RCurl_1.95-4.12             jsonlite_1.6               
##  [35] genefilter_1.68.0           impute_1.60.0              
##  [37] survival_2.44-1.1           gtable_0.3.0               
##  [39] zlibbioc_1.32.0             XVector_0.26.0             
##  [41] DelayedArray_0.12.2         DEoptimR_1.0-8             
##  [43] scales_1.1.0                mvtnorm_1.0-11             
##  [45] DBI_1.1.0                   Rcpp_1.0.3                 
##  [47] xtable_1.8-4                progress_1.2.2             
##  [49] htmlTable_1.13.1            foreign_0.8-71             
##  [51] bit_1.1-15.2                preprocessCore_1.48.0      
##  [53] Formula_1.2-3               htmlwidgets_1.5.1          
##  [55] httr_1.4.1                  ellipsis_0.3.0             
##  [57] acepack_1.4.1               farver_2.0.3               
##  [59] pkgconfig_2.0.3             reshape_0.8.8              
##  [61] XML_3.99-0.3                nnet_7.3-12                
##  [63] dbplyr_1.4.2                locfit_1.5-9.1             
##  [65] later_1.0.0                 labeling_0.3               
##  [67] tidyselect_0.2.5            rlang_0.4.4                
##  [69] munsell_0.5.0               cellranger_1.1.0           
##  [71] tools_3.6.0                 cli_2.0.1                  
##  [73] generics_0.0.2              RSQLite_2.2.0              
##  [75] broom_0.5.4                 fastmap_1.0.1              
##  [77] evaluate_0.14               yaml_2.2.0                 
##  [79] bit64_0.9-7                 fs_1.3.1                   
##  [81] robustbase_0.93-5           nlme_3.1-139               
##  [83] mime_0.9                    xml2_1.2.2                 
##  [85] compiler_3.6.0              rstudioapi_0.10            
##  [87] curl_4.3                    reprex_0.3.0               
##  [89] geneplotter_1.64.0          pcaPP_1.9-73               
##  [91] stringi_1.4.6               highr_0.8                  
##  [93] lattice_0.20-38             Matrix_1.2-17              
##  [95] vctrs_0.2.2                 pillar_1.4.3               
##  [97] lifecycle_0.1.0             data.table_1.12.8          
##  [99] bitops_1.0-6                httpuv_1.5.2               
## [101] rtracklayer_1.46.0          R6_2.4.1                   
## [103] latticeExtra_0.6-28         promises_1.1.0             
## [105] codetools_0.2-16            MASS_7.3-51.4              
## [107] assertthat_0.2.1            SummarizedExperiment_1.16.1
## [109] DESeq2_1.26.0               openssl_1.4.1              
## [111] withr_2.1.2                 GenomicAlignments_1.22.1   
## [113] Rsamtools_2.2.2             GenomeInfoDbData_1.2.2     
## [115] hms_0.5.3                   grid_3.6.0                 
## [117] rpart_4.1-15                rmarkdown_1.14             
## [119] Cairo_1.5-10                shiny_1.4.0                
## [121] lubridate_1.7.4             base64enc_0.1-3